/*******************************************************************************
Project:		Wealth -- Smith, Zidar, and Zwick
Last modified: 	2021-02-05
Modified by:	Dustin Swonder
Description:	This file builds an SCF microfile to be used broadly for the 
				paper and slides. Wealth and income concepts all in current USD.
				This file was originally named build_scf_microfile.do in 
				replication20200421
*******************************************************************************/

clear
clear mata
clear matrix
set maxvar 6000

/*******************************************************************************
	(0) Pull latest vintage of bulletin data and replicate weights from online
*******************************************************************************/

/* Don't need to run this every time.

capture !mkdir $rawdir/bulletin_data_fed2019vintage
cd $rawdir/bulletin_data_fed2019vintage

capture rm README.txt

forv year = 1989(3)2019 {
	copy https://www.federalreserve.gov/econres/files/scfp`year's.zip scfp`year's.zip, replace

	unzipfile scfp`year's.zip, replace 
	rm scfp`year's.zip
}

!echo "Last downloaded $S_DATE $S_TIME by Dustin Swonder" >> README.txt */

/*******************************************************************************
	(1) Load in raw data
*******************************************************************************/

forv year = 1989(3)2019 { // cycle through data files

	/***************************************************************************
		(1.1) Load in desired variables from summary files
	***************************************************************************/

	use "$rawdir/bulletin_data_fed2019vintage/rscfp`year'.dta", clear

	* Store identifiers in local macros, since they differ
	local unitid = cond(`year' == 1989, "XX1", "YY1")
	local identifier = cond(`year' == 1989, "X1", "Y1")

	if `year' == 1989 {
		rename x1 xx1, upper
	}
	else {
		rename y1 yy1, upper
	}

	#delimit ;
	keep `unitid' `identifier' networth wgt age actbus annuit asset bond bus bussefarminc
		call cashli ccbal cds comutf equity gbmutf govtbnd houses income install irakh
		kginc liq married mmda mmmf mrthel nmmf nnresre nonactbus notxbnd obnd odebt omutf 
		oresre othloc othma penacctwd resdbt reteq retqliq savbnd saving ssretinc stmutf
		stocks tfbmutf trusts vehic veh_inst;
	#delimit cr

	qui gen year = `year' // need to create year variable; no such var in raw file

	/***************************************************************************
		(1.2) Merge summary file with new variables we've constructed to full 
				raw file to make variables we can't construct without full 
				variables
	***************************************************************************/

	if `year' >= 2004 {
		#delimit ;
		local addtlvars = "X6581 X6582 X6591 X6592 X6551 X6559 X6567 X6552 X6560 
						   X6568 X6553 X6561 X6569 X6554 X6562 X6570 X6756 X6757 
						   X6758 X6556 X6564 X6572";
		#delimit cr
	}
	else if `year' >= 1998 {
		local addtlvars = "X6826 X6841 X3631" 
	}
	else {
		local addtlvars = "X3947 X3631"
	}

	local yr = substr("`year'", 3, 4)

	if `year' < 2019 {
		#delimit ;
		qui merge 1:1 `identifier' using "$rawdir/complete_data/p`yr'i6.dta",
			keepusing(X721 X5702 X5704 X5706 X5708 X5710 X5712 X5714 X5716 X5718 X5724 X5306 X5307 X5311 X5312 `addtlvars')
			assert(3) nogen;
		#delimit cr
	}
	else {
		rename Y1 y1
		local addtlvars = lower("`addtlvars'")

		#delimit ;
		qui merge 1:1 y1 using "$rawdir/complete_data/p`yr'i6.dta",
			keepusing(x721 x5702 x5704 x5706 x5708 x5710 x5712 x5714 x5716 x5718 x5724 x5306 x5307 x5311 x5312 `addtlvars')
			assert(3) nogen;
		#delimit cr

		rename y1 x*, upper
		local addtlvars = upper("`addtlvars'")
	}

		/***********************************************************************
			(1.2.1) Create/rename variables of interest using full file 
				variables
		***********************************************************************/

			/*******************************************************************
				(1.2.1.1) Rename variables which are good-to-go as-is
			*******************************************************************/

	rename (X721 X5706 X5708 X5710 X5724) (realestatetax intexm intinc divinc othinc)
	qui replace realestatetax = max(0, realestatetax)

			/*******************************************************************
				(1.2.1.2) Generate Social Security income variables from 
					frequency and amount variables re: SS income streams #1 and
					#2
			*******************************************************************/

	* Generate social security income variable
	qui gen ssinc = (X5307 == 3) * (26 * X5306) + (X5307 == 4) * (12 * X5306) ///
					+ (X5307 == 5) * (4 * X5306) + (X5307 == 6) * (X5306) ///
					+ (X5307 == 12) * (6 * X5306) /// SS payment stream 1
					+ (X5312 == 3) * (26 * X5311) + (X5312 == 4) * (12 * X5311) ///
					+ (X5312 == 5) * (4 * X5311) + (X5312 == 6) * (X5311) ///
					+ (X5312 == 12) * (6 * X5311) // SS payment stream 2

			/*******************************************************************
				(1.2.1.3) Generate an AGI concept following 
					https://www.minneapolisfed.org/research/sr/sr578.pdf
			*******************************************************************/

	qui gen kginc_botcensor = cond(X5712 < - 3000, -3000, X5712)

	egen agi = rowtotal(X5702 X5704 intinc divinc kginc_botcensor X5714 X5716 X5718 othinc), missing

	drop X5* kginc_botcensor

	/***************************************************************************
		(1.3) Get variables in nominal terms for everything; need to undo 
				inflation adjustment for summary file variables, which are all
				in 2016 dollars
	***************************************************************************/

	qui merge m:1 year using $dtadir/scf2019infladjfactors.dta, assert(2 3) keep(3) nogen

	foreach summary_wlthvar of varlist saving tfbmutf govtbnd annuit oresre resdbt ///
	  mmda gbmutf obnd trusts nnresre ccbal mmmf comutf bond othma bus install ///
	  call omutf irakh equity actbus odebt liq nmmf retqliq reteq asset networth ///
 	  cds stocks savbnd vehic mrthel stmutf notxbnd cashli houses othloc nonactbus ///
 	  veh_inst {
		
		qui replace `summary_wlthvar' = `summary_wlthvar' / assetadjfactor
	}

	foreach summary_incvar in bussefarminc income kginc ssretinc penacctwd {
		qui replace `summary_incvar' = `summary_incvar' / incadjfactor
	}
	drop *adjfactor

	/***************************************************************************
		(1.4) Make main wealth  aggregates using both bulletin wealth concepts 
			and full file-derived variables
	***************************************************************************/

		/***********************************************************************
			(1.4.0) Net worth minus vehicular wealth
		***********************************************************************/

	qui gen networth_novehic = networth - vehic

		/***********************************************************************
			(1.4.1) Fixed income
		***********************************************************************/

			/*******************************************************************
				(1.4.1.1) Allocate part of non-money market mutual funds to 
					equity and part to fixed income assets
			********************************************************************/

	qui gen nmmf_fixed = nmmf - stmutf - (0.5 * comutf)

			/*******************************************************************
				(1.4.1.2) Allocate part of other managed assets to equity, and 
					part to equity. For recent years, display annuities and 
					trusts shares (for draft appendix).
			*******************************************************************/

	if `year' >= 2004 {
		qui gen annuit_equshare = cond(X6581 == 1, 1, cond(inlist(X6581, 3, 30), max(0, X6582) / 10000, 0))
		qui gen trusts_equshare = cond(X6591 == 1, 1, cond(inlist(X6591, 3, 30), max(0, X6592) / 10000, 0))

		qui summ annuit_equshare [aw = wgt] if annuit > 0 & !missing(annuit), meanonly
		di "`year' mean annuities equity share: " `r(mean)'

		qui summ trusts_equshare [aw = wgt] if trusts > 0 & !missing(trusts), meanonly
		di "`year' mean trusts equity share: " `r(mean)'

		qui gen othma_equity = annuit * annuit_equshare + trusts * trusts_equshare		
	}
	else if `year' >= 1998 {
		qui gen annuit_equshare = cond(X6826 == 1, 1, ///
									cond(inlist(X6826, 5, 6), 0.5, ///
										cond(X6826 == -7, 0.3, 0)))
		qui gen trusts_equshare = cond(X6841 == 1, 1, ///
									cond(inlist(X6841, 5, 6), 0.5, ///
										cond(X6841 == -7, 0.3, 0)))

		qui gen othma_equity = annuit * annuit_equshare + trusts * trusts_equshare
	}
	else {
		qui gen othma_equshare = cond(X3947 == 1, 1, ///
									cond(inlist(X3947, 5, 6), 0.5, ///
										cond(X3947 == -7, 0.3, 0)))

		qui gen othma_equity = othma * othma_equshare
	}

	if `year' >= 2004 {
		qui gen trusts_fixed_sz = max(0, trusts * (1 - trusts_equshare))
		assert abs(trusts - trusts_fixed_sz - (trusts * trusts_equshare) < 10)
	}
	else {
		qui gen trusts_fixed_sz = trusts * 0.5
	}

	drop *_equshare

	qui gen othma_fixed = max(0, othma - othma_eq)

	assert abs(othma - othma_fixed - othma_equity) < 10 // Ensure the two sum to othma s.t. rounding error

			/*******************************************************************
				(1.4.1.3) Generate fixed income 
			*******************************************************************/

	qui gen taxbond_preferred = liq + cds + nmmf_fixed + othma_fixed + savbnd ///
								+ bond - notxbnd - tfbmutf
	drop nmmf_fixed

		/***********************************************************************
			(1.4.2) IRA-Keogh assets: divide into equity and non-equity 
				components
		***********************************************************************/

/* 	if `year' >= 2004 {
		foreach irakhvar of varlist X6551 X6559 X6567 X6552 X6560 X6568 X6553 X6561 ///
			X6569 X6554 X6562 X6570 X6756 X6757 X6758 X6556 X6564 X6572 {
			di "230"
			qui replace `irakhvar' = max(`irakhvar', 0) // Bottom-code as zero
		}

		qui egen irakh_r = rowtotal(X6551 X6552 X6553 X6554) // respondent
		qui egen irakh_sp = rowtotal(X6559 X6560 X6561 X6562) // spouse
		qui egen irakh_ofm = rowtotal(X6567 X6568 X6569 X6570) // other family member

		gen irakh_check = irakh_r + irakh_sp + irakh_ofm

		assert inrange(irakh / irakh_check, 0.9999, 1.0001) | irakh == irakh_check

		di "242"
		assert irakh_r == X6756
		assert irakh_sp == X6757
		assert irakh_ofm == X6758
		drop X6756 X6757 X6758
		di "247"
		if `year' >= 2013 {
			qui gen equcmpt_irakh_r = irakh_r * (X6555 == 1) + ///
								  irakh_r * (inlist(X6555, 3, 30)) * (X6556 / 10000)

			qui gen equcmpt_irakh_sp = irakh_sp * (X6563 == 1) + ///
								   irakh_sp * (inlist(X6563, 3, 30)) * (X6564 / 10000)

			qui gen equcmpt_irakh_ofm = irakh_ofm * (X6571 == 1) + ///
								    irakh_ofm * (inlist(X6571, 3, 30)) * (X6572 / 10000)
		}
		else {
			qui gen equcmpt_irakh_r = irakh_r * (X6555 == 1) + irakh_r * (X6555 == 3) * (X6556 / 10000)
			qui gen equcmpt_irakh_sp = irakh_sp * (X6563 == 1) + irakh_sp * (X6563 == 3) * (X6564 / 10000)
			qui gen equcmpt_irakh_ofm = irakh_ofm * (X6571 == 1) + irakh_ofm * (X6571 == 3) * (X6572 / 10000)
		}
		di "263"
		qui egen irakh_equity = rowtotal(equcmpt_irakh_*)
		drop equcmpt_irakh_*
	}
	else {
		qui replace X3631 = max(0, X3631) // Bottom-code as zero
		#delimit ;
		qui gen irakh_equity = cond(!inlist(X3631, 2, 4, 5, 6), 0,
						   	   cond(X3631 == 2, irakh,
						       cond(inlist(X3631, 5, 6), irakh * 0.5, irakh * 0.3)));
		#delimit cr
		drop X3631
	}
	
	assert irakh_equity <= irakh | inrange(irakh / irakh_equity, 0.9999, 1.0001) */

	drop `addtlvars'

		/***********************************************************************
			(1.4.3) Directly-held business equity, stock mutual funds, half of 
				combination mutual funds, and part of annuities and trusts
		***********************************************************************/

	qui gen stockw_preferred = stocks + stmutf + (0.5 * comutf) + othma_equity

		/***********************************************************************
			(1.4.4) Housing assets
		***********************************************************************/

	qui gen busw_preferred = bus + nnresre

		/***********************************************************************
			(1.4.5) Housing assets
		***********************************************************************/

	qui gen hwhou = (houses + oresre) - (mrthel + resdbt)

	/***************************************************************************
		(1.5) Make PSZ-style wealth aggregates
	***************************************************************************/

		/***********************************************************************
			(1.5.1) SZ 2016
		***********************************************************************/

	qui gen intinc_wscf = (liq + cds + savbnd + bond - notxbnd + nmmf - stmutf - ///
					tfbmutf - .5*comutf) // = intinc_wscf in PSZ use_scf.do
	qui gen intexm_wscf = notxbnd + tfbmutf // = intexm_wscf in PSZ use_scf.do

	qui gen kgdivinc_wscf = stocks + stmutf + 0.5 * comutf

		/***********************************************************************
			(1.5.2) SZ 2020 Revisionists (August 2020 version)
		***********************************************************************/

	qui gen intinc_wscf_szrev = cond(year < 2004, saving + mmda + call + cds + savbnd + (bond - notxbnd) + trusts_fixed, ///
                            			saving + mmda + call + cds + savbnd + (bond - notxbnd) + 0.5 * trusts) 

	/***************************************************************************
		(1.6) Make income measures
	***************************************************************************/

	qui gen peninc = max(0, ssretinc - ssinc)
	qui drop ssretinc ssinc

	/***************************************************************************
		(1.7) Save as tempfile
	***************************************************************************/

	tempfile file`year'
	qui save `file`year''
}

/*******************************************************************************
	(2) Append data together and clean up a bit
*******************************************************************************/

clear

forv year = 1989(3)2019 {
	append using `file`year''
}

* Make YY1 and Y1 into main identifiers if year is 1989 (identifiers = XX1, X1)
assert missing(YY1) & missing(Y1) & !missing(XX1) & !missing(X1) if year == 1989
qui replace YY1 = XX1 if year == 1989
qui replace Y1 = X1 if year == 1989
drop XX1 X1

compress

save $dumpdir/scf_nodb_noformatting.dta, replace

/*******************************************************************************
	(3) Merge in Sabelhaus-Henriques Volz pensions and create net total db  
		pensions measure and new net worth measures including DB. Display 2016 
		total of each of Sabelhaus-Henriques Volz variables for draft appendix.
*******************************************************************************/

rename Y1 y1 // for merge

merge 1:1 y1 year using "$litroot/sabelhaus/DBwealthFiles2019update/db.wealth/DB_household092820.dta", ///
	assert(3) keepusing(*_dbamt_*) nogen
rename y1 Y1

qui egen tot_pen_db = rowtotal(*_dbamt_*)

foreach dbvar of varlist *_dbamt_* {
	qui summ `dbvar' [aw = wgt] if year == 2016, meanonly 

	di "2016 value `dbvar': " `r(sum)'
}
qui drop *_dbamt_*

qui gen networth_db = networth + tot_pen_db
qui gen networth_db_novehic = networth_novehic + tot_pen_db
drop networth_novehic

/*******************************************************************************
	(4) Rank by base net worth measure, preferred net worth measure, and 
		taxable interest income
*******************************************************************************/

qui cumul networth [aw = wgt], gen(raw_rank) by(year)
qui cumul networth_db_novehic [aw = wgt], gen(base_rank) by(year)

qui cumul intinc [aw = wgt], gen(intinc_rank) by(year)

/*******************************************************************************
	(5) Clean up, label, sort, and save data
*******************************************************************************/

capture drop __000* // get rid of tempvar residue if present

qui ds year YY1 Y1 wgt *_rank networth, not
order `r(varlist)', alphabetic last
order year YY1 Y1 wgt *_rank networth

sort year YY1 Y1

lab var age "Age of the household head (top-coded 95, bot-coded 17)"
lab var actbus "Actively managed business"
lab var agi "Adjusted Gross Income (approximation)"
lab var asset "Total Assets from SCF summary files"
lab var annuit "Annuities"
lab var base_rank "Rank among SCF by net worth + db - vehic"
lab var bond "Total Bonds, Excl. Bond Funds and Savings Bonds"
lab var bus "Business Interests (bulletin)"
lab var bussefarminc "Business/farm income"
lab var busw_preferred "Preferred priv biz wealth (bus + nnresre)"
lab var call "Call accounts"
lab var cashli "Cash Value of Whole Life Insurance"
lab var ccbal "Credit Card Balance"
lab var cds "CDs"
lab var kgdivinc_wscf "Equity assets, excl. pensions"
lab var comutf "Combination and Other Mutual Funds"
lab var equity "Equity bulletin concept"
lab var gbmutf "Government Bond Mutual Funds"
lab var govtbnd "Government and government agency bonds"
lab var houses "Value of Primary Residence"
lab var hwhou "Housing and res. real estate net of mortgages"
lab var income "Total Income"
lab var install "Other Installment Debt"
lab var intexm "Non-taxed Interest Income"
lab var intexm_wscf "Non-taxed Interest Assets"
lab var intinc "Taxable Interest Income"
lab var intinc_rank "Rank among SCF by taxable interest income"
lab var intinc_wscf "Taxable Interest Assets (SZ 2016)"
lab var intinc_wscf_szrev "Taxable Interest Assets (SZ 2020)"
lab var kginc "Income from capital gains"
lab var liq "Total Liquid Assets from SCF summary files"
lab var married "=1 married/living w partner =2 not married/living w partner"
lab var mmda "Money market deposit accounts"
lab var mmmf "Money market mutual funds"
lab var mrthel "Debt Secured by Primary Real Estate"
lab var networth "Net worth from SCF summary files"
lab var networth_db "Net worth bulletin + Sabelhaus-Henriques DB"
lab var networth_db_novehic "Net worth + Sabelhaus-Henriques DB - vehic"
lab var nmmf "Total Directly-Held Mutual Funds, Excl. MMMFs"
lab var nnresre "Non-residential real estate"
lab var nonactbus "Business not actively managed"
lab var notxbnd "Tax-exempt bonds (state + local bonds)"
lab var obnd "Corporate and foreign bonds"
lab var odebt "Other debt"
lab var omutf "Other mutual funds"
lab var oresre "Other residential real estate"
lab var othma "Other managed assets (Annuities + Trusts)"
lab var othma_equity "Other managed assets in Equity (Bricker)"
lab var othma_fixed "Other managed assets in fixed inc. (Bricker)"
lab var penacctwd "Pension account withdrawal (DC income)"
lab var peninc "Pension Income, incl. pen acct withdrawal"
lab var realestatetax "Taxes paid on real estate"
lab var resdbt "Other Residential Debt"
lab var retqliq "Total quasi-liquid: IRAs, thrift, future pensions"
lab var savbnd "Savings Bonds"
lab var saving "Savings account balance not in MMA or sweep accounts"
lab var stmutf "Stock Mutual Funds"
lab var stocks "Stocks"
lab var stockw_preferred "Preferred equity defn"
lab var taxbond_preferred "Preferred taxable fixed inc. defn"
lab var tot_pen_db "Sabelhaus-Henriques Volz DB pension total"
lab var tfbmutf "Tax-free Bond Mutual Funds"
lab var trusts "Trusts"
lab var trusts_fixed "Trusts invested in fixed claims (assumed 50% pre-1998)"
lab var vehic "Value of Vehicles"
lab var veh_inst "Vehicle loans"

save $dtadir/scfplus_slim.dta, replace